library(ggplot2)

Evaluating all models on dataset: Gorilla Test

questions = read.csv("data/gorilla_test_continuous.csv", sep="  ")

Evaluate 00_bag_of_words on Gorilla Test

results = read.csv("output/00_bag_of_words_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05660 -0.03318 -0.02829  0.02295  0.88373 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.028293   0.003242   8.727  < 2e-16 ***
## questions$y 0.028306   0.008437   3.355 0.000845 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0574 on 586 degrees of freedom
## Multiple R-squared:  0.01885,    Adjusted R-squared:  0.01717 
## F-statistic: 11.26 on 1 and 586 DF,  p-value: 0.0008445
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n00_bag_of_words\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 01_harmony_paraphrase-multilingual-MiniLM-L12-v2 on Gorilla Test

results = read.csv("output/01_harmony_paraphrase-multilingual-MiniLM-L12-v2_model_on_gorilla_test_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24806 -0.08598 -0.01575  0.08159  0.51494 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.290288   0.006659  43.592  < 2e-16 ***
## questions$y 0.104806   0.017330   6.048 2.62e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1179 on 586 degrees of freedom
## Multiple R-squared:  0.05875,    Adjusted R-squared:  0.05714 
## F-statistic: 36.58 on 1 and 586 DF,  p-value: 2.618e-09
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n01_harmony_paraphrase-multilingual-MiniLM-L12-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 02_harmony_distiluse-base-multilingual-cased-v2 on Gorilla Test

results = read.csv("output/02_harmony_distiluse-base-multilingual-cased-v2_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26998 -0.09691 -0.01865  0.08191  0.65351 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.17920    0.00741  24.182  < 2e-16 ***
## questions$y  0.11730    0.01929   6.082 2.14e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1312 on 586 degrees of freedom
## Multiple R-squared:  0.05938,    Adjusted R-squared:  0.05778 
## F-statistic:    37 on 1 and 586 DF,  p-value: 2.137e-09
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n02_harmony_distiluse-base-multilingual-cased-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 03_harmony_stsb-xlm-r-multilingual on Gorilla Test

results = read.csv("output/03_harmony_stsb-xlm-r-multilingual_model_on_gorilla_test_continuous.csv", sep="  ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34730 -0.08803 -0.00613  0.09131  0.48369 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.323896   0.007581  42.722  < 2e-16 ***
## questions$y 0.119809   0.019730   6.072 2.27e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1342 on 586 degrees of freedom
## Multiple R-squared:  0.0592, Adjusted R-squared:  0.0576 
## F-statistic: 36.87 on 1 and 586 DF,  p-value: 2.265e-09
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n03_harmony_stsb-xlm-r-multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 04_harmony_paraphrase-multilingual-mpnet-base-v2 on Gorilla Test

results = read.csv("output/04_harmony_paraphrase-multilingual-mpnet-base-v2_model_on_gorilla_test_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28904 -0.08231 -0.00774  0.07656  0.49546 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.304029   0.006401  47.495  < 2e-16 ***
## questions$y 0.133727   0.016658   8.028 5.46e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1133 on 586 degrees of freedom
## Multiple R-squared:  0.09907,    Adjusted R-squared:  0.09754 
## F-statistic: 64.44 on 1 and 586 DF,  p-value: 5.459e-15
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n04_harmony_paraphrase-multilingual-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 05_harmony_all-mpnet-base-v2 on Gorilla Test

results = read.csv("output/05_harmony_all-mpnet-base-v2_model_on_gorilla_test_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27184 -0.07691 -0.00316  0.06680  0.46247 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.220283   0.006029  36.537   <2e-16 ***
## questions$y 0.134042   0.015690   8.543   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1068 on 586 degrees of freedom
## Multiple R-squared:  0.1108, Adjusted R-squared:  0.1092 
## F-statistic: 72.99 on 1 and 586 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n05_harmony_all-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 10_jose on Gorilla Test

results = read.csv("output/10_jose_model_on_gorilla_test_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26459 -0.08976 -0.01490  0.08245  0.48942 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.161909   0.007257   22.31   <2e-16 ***
## questions$y 0.205255   0.018885   10.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1285 on 586 degrees of freedom
## Multiple R-squared:  0.1678, Adjusted R-squared:  0.1664 
## F-statistic: 118.1 on 1 and 586 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n10_jose\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 11_raafi on Gorilla Test

results = read.csv("output/11_raafi_model_on_gorilla_test_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31973 -0.10799 -0.01814  0.09352  0.50840 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.151555   0.007839   19.33   <2e-16 ***
## questions$y 0.227928   0.020401   11.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1388 on 586 degrees of freedom
## Multiple R-squared:  0.1756, Adjusted R-squared:  0.1742 
## F-statistic: 124.8 on 1 and 586 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n11_raafi\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 20_openai_text-embedding-ada-002 on Gorilla Test

results = read.csv("output/20_openai_text-embedding-ada-002_model_on_gorilla_test_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.064331 -0.022306 -0.004568  0.018511  0.165382 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.778004   0.001788 435.046  < 2e-16 ***
## questions$y 0.038280   0.004654   8.225 1.26e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03167 on 586 degrees of freedom
## Multiple R-squared:  0.1035, Adjusted R-squared:  0.102 
## F-statistic: 67.66 on 1 and 586 DF,  p-value: 1.26e-15
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n20_openai_text-embedding-ada-002\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 21_openai_text-embedding-3-large on Gorilla Test

results = read.csv("output/21_openai_text-embedding-3-large_model_on_gorilla_test_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22722 -0.06424 -0.00892  0.05395  0.60928 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.226149   0.005228  43.258   <2e-16 ***
## questions$y 0.130319   0.013605   9.579   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09257 on 586 degrees of freedom
## Multiple R-squared:  0.1354, Adjusted R-squared:  0.1339 
## F-statistic: 91.76 on 1 and 586 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n21_openai_text-embedding-3-large\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 40_google_vertex_ai_gecko on Gorilla Test

results = read.csv("output/40_google_vertex_ai_gecko_model_on_gorilla_test_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.127154 -0.029805 -0.002913  0.030491  0.172600 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.755533   0.002473  305.48  < 2e-16 ***
## questions$y 0.035719   0.006436    5.55 4.34e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04379 on 586 degrees of freedom
## Multiple R-squared:  0.04993,    Adjusted R-squared:  0.04831 
## F-statistic:  30.8 on 1 and 586 DF,  p-value: 4.344e-08
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n40_google_vertex_ai_gecko\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 41_google_vertex_ai_gecko_multilingual on Gorilla Test

results = read.csv("output/41_google_vertex_ai_gecko_multilingual_model_on_gorilla_test_continuous.csv", sep="  ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.071753 -0.016015  0.000942  0.017731  0.091082 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.853988   0.001439 593.317  < 2e-16 ***
## questions$y 0.026230   0.003746   7.003 6.91e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02549 on 586 degrees of freedom
## Multiple R-squared:  0.07722,    Adjusted R-squared:  0.07564 
## F-statistic: 49.04 on 1 and 586 DF,  p-value: 6.91e-12
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Test: predictions of model\n41_google_vertex_ai_gecko_multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluating all models on dataset: Gorilla Train

questions = read.csv("data/gorilla_train_continuous.csv", sep=" ")

Evaluate 00_bag_of_words on Gorilla Train

results = read.csv("output/00_bag_of_words_model_on_gorilla_train_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.04638 -0.03354 -0.03053  0.02411  0.38695 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.030531   0.001304  23.406  < 2e-16 ***
## questions$y 0.015849   0.003431   4.619 4.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0458 on 2349 degrees of freedom
## Multiple R-squared:  0.009,  Adjusted R-squared:  0.008578 
## F-statistic: 21.33 on 1 and 2349 DF,  p-value: 4.068e-06
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n00_bag_of_words\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 01_harmony_paraphrase-multilingual-MiniLM-L12-v2 on Gorilla Train

results = read.csv("output/01_harmony_paraphrase-multilingual-MiniLM-L12-v2_model_on_gorilla_train_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29258 -0.08957 -0.01122  0.07723  0.55788 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.294845   0.003465   85.09   <2e-16 ***
## questions$y 0.117966   0.009115   12.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1217 on 2349 degrees of freedom
## Multiple R-squared:  0.06656,    Adjusted R-squared:  0.06616 
## F-statistic: 167.5 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n01_harmony_paraphrase-multilingual-MiniLM-L12-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 02_harmony_distiluse-base-multilingual-cased-v2 on Gorilla Train

results = read.csv("output/02_harmony_distiluse-base-multilingual-cased-v2_model_on_gorilla_train_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28036 -0.10809 -0.02037  0.08638  0.62556 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.18286    0.00392   46.65   <2e-16 ***
## questions$y  0.11835    0.01031   11.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1376 on 2349 degrees of freedom
## Multiple R-squared:  0.05311,    Adjusted R-squared:  0.05271 
## F-statistic: 131.8 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n02_harmony_distiluse-base-multilingual-cased-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 03_harmony_stsb-xlm-r-multilingual on Gorilla Train

results = read.csv("output/03_harmony_stsb-xlm-r-multilingual_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38340 -0.09545 -0.00419  0.09316  0.54619 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.339511   0.003985  85.196   <2e-16 ***
## questions$y 0.092303   0.010483   8.805   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1399 on 2349 degrees of freedom
## Multiple R-squared:  0.03195,    Adjusted R-squared:  0.03154 
## F-statistic: 77.53 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n03_harmony_stsb-xlm-r-multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 04_harmony_paraphrase-multilingual-mpnet-base-v2 on Gorilla Train

results = read.csv("output/04_harmony_paraphrase-multilingual-mpnet-base-v2_model_on_gorilla_train_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31752 -0.08669 -0.00906  0.07643  0.48324 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.311959   0.003406   91.59   <2e-16 ***
## questions$y 0.130843   0.008959   14.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1196 on 2349 degrees of freedom
## Multiple R-squared:  0.08324,    Adjusted R-squared:  0.08285 
## F-statistic: 213.3 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n04_harmony_paraphrase-multilingual-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 05_harmony_all-mpnet-base-v2 on Gorilla Train

results = read.csv("output/05_harmony_all-mpnet-base-v2_model_on_gorilla_train_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30851 -0.08115 -0.01038  0.06955  0.48684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.228753   0.003278   69.78   <2e-16 ***
## questions$y 0.133164   0.008624   15.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1151 on 2349 degrees of freedom
## Multiple R-squared:  0.09215,    Adjusted R-squared:  0.09177 
## F-statistic: 238.4 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n05_harmony_all-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 10_jose on Gorilla Train

results = read.csv("output/10_jose_model_on_gorilla_train_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32708 -0.08633 -0.01909  0.07063  0.51640 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.137731   0.003534   38.98   <2e-16 ***
## questions$y 0.305037   0.009296   32.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1241 on 2349 degrees of freedom
## Multiple R-squared:  0.3143, Adjusted R-squared:  0.314 
## F-statistic:  1077 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n10_jose\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 11_raafi on Gorilla Train

results = read.csv("output/11_raafi_model_on_gorilla_train_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31143 -0.07633 -0.01795  0.07008  0.54754 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.096383   0.003245   29.70   <2e-16 ***
## questions$y 0.427096   0.008537   50.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.114 on 2349 degrees of freedom
## Multiple R-squared:  0.5159, Adjusted R-squared:  0.5157 
## F-statistic:  2503 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n11_raafi\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 20_openai_text-embedding-ada-002 on Gorilla Train

results = read.csv("output/20_openai_text-embedding-ada-002_model_on_gorilla_train_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.084409 -0.024983 -0.004173  0.021639  0.140441 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.7787366  0.0009703  802.61   <2e-16 ***
## questions$y 0.0424163  0.0025523   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03407 on 2349 degrees of freedom
## Multiple R-squared:  0.1052, Adjusted R-squared:  0.1048 
## F-statistic: 276.2 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n20_openai_text-embedding-ada-002\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 21_openai_text-embedding-3-large on Gorilla Train

results = read.csv("output/21_openai_text-embedding-3-large_model_on_gorilla_train_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24942 -0.07476 -0.00977  0.06011  0.55280 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.229794   0.002858   80.41   <2e-16 ***
## questions$y 0.140477   0.007517   18.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1003 on 2349 degrees of freedom
## Multiple R-squared:  0.1294, Adjusted R-squared:  0.1291 
## F-statistic: 349.2 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n21_openai_text-embedding-3-large\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 40_google_vertex_ai_gecko on Gorilla Train

results = read.csv("output/40_google_vertex_ai_gecko_model_on_gorilla_train_continuous.csv", sep="  ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.137242 -0.032080 -0.001186  0.032612  0.159676 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.755104   0.001295  582.96   <2e-16 ***
## questions$y 0.043516   0.003407   12.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04548 on 2349 degrees of freedom
## Multiple R-squared:  0.06493,    Adjusted R-squared:  0.06453 
## F-statistic: 163.1 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n40_google_vertex_ai_gecko\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 41_google_vertex_ai_gecko_multilingual on Gorilla Train

results = read.csv("output/41_google_vertex_ai_gecko_multilingual_model_on_gorilla_train_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.076036 -0.018178 -0.000117  0.018602  0.096765 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.855249   0.000750 1140.34   <2e-16 ***
## questions$y 0.025297   0.001973   12.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02634 on 2349 degrees of freedom
## Multiple R-squared:  0.06541,    Adjusted R-squared:  0.06502 
## F-statistic: 164.4 on 1 and 2349 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: Gorilla Train: predictions of model\n41_google_vertex_ai_gecko_multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluating all models on dataset: McElroy 2024 Cosine Correlation

questions = read.csv("data/mcelroy_2024_cosine_correlation_continuous.csv", sep="   ")

Evaluate 00_bag_of_words on McElroy 2024 Cosine Correlation

results = read.csv("output/00_bag_of_words_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep="  ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08143 -0.05149 -0.00142  0.03111  0.35602 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.03422    0.01947  -1.758   0.0792 .  
## questions$y  0.13502    0.03001   4.498 7.95e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0614 on 739 degrees of freedom
## Multiple R-squared:  0.02665,    Adjusted R-squared:  0.02534 
## F-statistic: 20.24 on 1 and 739 DF,  p-value: 7.949e-06
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n00_bag_of_words\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 01_harmony_paraphrase-multilingual-MiniLM-L12-v2 on McElroy 2024 Cosine Correlation

results = read.csv("output/01_harmony_paraphrase-multilingual-MiniLM-L12-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31863 -0.07984  0.00036  0.06857  0.44146 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.19429    0.03743  -5.191  2.7e-07 ***
## questions$y  0.90291    0.05770  15.647  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.118 on 739 degrees of freedom
## Multiple R-squared:  0.2489, Adjusted R-squared:  0.2478 
## F-statistic: 244.8 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n01_harmony_paraphrase-multilingual-MiniLM-L12-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 02_harmony_distiluse-base-multilingual-cased-v2 on McElroy 2024 Cosine Correlation

results = read.csv("output/02_harmony_distiluse-base-multilingual-cased-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep="  ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29003 -0.08933 -0.01011  0.07986  0.51659 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.03942    0.04051  -0.973    0.331    
## questions$y  0.63494    0.06246  10.165   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1278 on 739 degrees of freedom
## Multiple R-squared:  0.1227, Adjusted R-squared:  0.1215 
## F-statistic: 103.3 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n02_harmony_distiluse-base-multilingual-cased-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 03_harmony_stsb-xlm-r-multilingual on McElroy 2024 Cosine Correlation

results = read.csv("output/03_harmony_stsb-xlm-r-multilingual_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34905 -0.08641 -0.00505  0.08387  0.42588 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.12061    0.04084  -2.954  0.00324 ** 
## questions$y  0.82406    0.06296  13.089  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1288 on 739 degrees of freedom
## Multiple R-squared:  0.1882, Adjusted R-squared:  0.1871 
## F-statistic: 171.3 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n03_harmony_stsb-xlm-r-multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 04_harmony_paraphrase-multilingual-mpnet-base-v2 on McElroy 2024 Cosine Correlation

results = read.csv("output/04_harmony_paraphrase-multilingual-mpnet-base-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30249 -0.07517 -0.00465  0.07130  0.37057 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.19729    0.03615  -5.458 6.57e-08 ***
## questions$y  0.92976    0.05573  16.684  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.114 on 739 degrees of freedom
## Multiple R-squared:  0.2736, Adjusted R-squared:  0.2726 
## F-statistic: 278.4 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n04_harmony_paraphrase-multilingual-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 05_harmony_all-mpnet-base-v2 on McElroy 2024 Cosine Correlation

results = read.csv("output/05_harmony_all-mpnet-base-v2_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27683 -0.07964 -0.00329  0.07186  0.39762 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.33308    0.03534  -9.425   <2e-16 ***
## questions$y  1.02645    0.05449  18.839   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1115 on 739 degrees of freedom
## Multiple R-squared:  0.3244, Adjusted R-squared:  0.3235 
## F-statistic: 354.9 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n05_harmony_all-mpnet-base-v2\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 10_jose on McElroy 2024 Cosine Correlation

results = read.csv("output/10_jose_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep="  ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33345 -0.09822 -0.01273  0.09032  0.39168 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.42997    0.04220  -10.19   <2e-16 ***
## questions$y  1.17787    0.06507   18.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1331 on 739 degrees of freedom
## Multiple R-squared:  0.3072, Adjusted R-squared:  0.3063 
## F-statistic: 327.7 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n10_jose\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 11_raafi on McElroy 2024 Cosine Correlation

results = read.csv("output/11_raafi_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38330 -0.10893 -0.00846  0.09813  0.46741 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.35469    0.04667  -7.599 9.04e-14 ***
## questions$y  1.03463    0.07196  14.378  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1472 on 739 degrees of freedom
## Multiple R-squared:  0.2186, Adjusted R-squared:  0.2175 
## F-statistic: 206.7 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n11_raafi\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 20_openai_text-embedding-ada-002 on McElroy 2024 Cosine Correlation

results = read.csv("output/20_openai_text-embedding-ada-002_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.088251 -0.018355 -0.000054  0.016758  0.096320 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.653339   0.008471   77.12   <2e-16 ***
## questions$y 0.275569   0.013060   21.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02672 on 739 degrees of freedom
## Multiple R-squared:  0.3759, Adjusted R-squared:  0.3751 
## F-statistic: 445.2 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n20_openai_text-embedding-ada-002\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 21_openai_text-embedding-3-large on McElroy 2024 Cosine Correlation

results = read.csv("output/21_openai_text-embedding-3-large_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep=" ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20141 -0.05750 -0.00543  0.04633  0.37589 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.13862    0.02705  -5.124 3.82e-07 ***
## questions$y  0.82011    0.04171  19.664  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08531 on 739 degrees of freedom
## Multiple R-squared:  0.3435, Adjusted R-squared:  0.3426 
## F-statistic: 386.7 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n21_openai_text-embedding-3-large\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 40_google_vertex_ai_gecko on McElroy 2024 Cosine Correlation

results = read.csv("output/40_google_vertex_ai_gecko_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep="    ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.094162 -0.022628 -0.000911  0.019889  0.116381 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.61764    0.01047   59.01   <2e-16 ***
## questions$y  0.30045    0.01614   18.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03301 on 739 degrees of freedom
## Multiple R-squared:  0.3193, Adjusted R-squared:  0.3184 
## F-statistic: 346.6 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n40_google_vertex_ai_gecko\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'

Evaluate 41_google_vertex_ai_gecko_multilingual on McElroy 2024 Cosine Correlation

results = read.csv("output/41_google_vertex_ai_gecko_multilingual_model_on_mcelroy_2024_cosine_correlation_continuous.csv", sep="   ")
results$y = questions$y
model = lm(results$y_pred ~ questions$y)
summary(model)
## 
## Call:
## lm(formula = results$y_pred ~ questions$y)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.081837 -0.013984  0.000495  0.014695  0.071735 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.751519   0.007332  102.50   <2e-16 ***
## questions$y 0.204931   0.011304   18.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02312 on 739 degrees of freedom
## Multiple R-squared:  0.3078, Adjusted R-squared:  0.3069 
## F-statistic: 328.6 on 1 and 739 DF,  p-value: < 2.2e-16
ggplot(results, aes(x = y, y = y_pred) ) + geom_point() + geom_smooth(method = "lm") + ggtitle("Dataset: McElroy 2024 Cosine Correlation: predictions of model\n41_google_vertex_ai_gecko_multilingual\nvs ground truth (y)")
## `geom_smooth()` using formula = 'y ~ x'